info_healthy) и пациентах с СРК
(info_ibs)info_healthy <- read_xlsx ("data/raw/final_health_statistic.xlsx")
info_ibs <- read_xlsx ("data/raw/final_ibs_141_statistic.xlsx")
info_healthy и
info_ibs## Уникальные переменные в info_ibs:
## Main_Disease
## Weight_kg
## Height_cm
## BMI
## Diet_type
## Diet_duration
## Additive_usage
## Уникальные переменные в info_healthy:
## Drugs
info_healthy и info_ibs в
единый датасет combined_info c редактированием
содержимогоcombined_info <- info_healthy %>%
bind_rows(info_ibs) %>%
mutate (
BMI_min = ifelse (is.na (BMI_min), round (Weight_min /(Height_max/100 * Height_max/100), 2), BMI_min),
BMI_max = ifelse (is.na (BMI_max), round (Weight_max /(Height_min/100 * Height_min/100), 2), BMI_max)
) %>%
unite("BMI_range", BMI_min, BMI_max, sep = "-", na.rm = TRUE) %>%
unite ("Age_range", Age_min, Age_max, sep = "-", na.rm = TRUE) %>%
mutate(
Age_range = case_when(
Age_range == "18-40" | Age_range == "23-28" | Age_range == "16-42" | Age_range == "21-43" ~ "16-43",
Age <= 43 ~ "16-43",
Age > 43 ~ "> 43",
TRUE ~ NA_character_), #удалена группа 28-54
research_ID = sub ("research_", "", research_ID),
research_ID = case_when(
research_ID == 0 ~ 1,
research_ID == 1 ~ 2,
research_ID == 2 ~ 3,
research_ID == 3 ~ 4,
research_ID == 4 ~ 5,
research_ID == 6 ~ 6,
research_ID == 7 ~ 7),
patient_ID = row_number(),
Sex = ifelse (Sex == "mixed", NA, Sex),
Smoking = sub ("never", "Never", Smoking),
Smoking = case_when(
Smoking == "No" ~ "No",
Smoking == "Never" ~ "No",
Smoking == "Rarely (a few times/month)" ~ "Yes", #5 чел.
Smoking == "Occasionally (1-2 times/week)" ~ "Yes", #3 чел.
Smoking == "Regularly (3-5 times/week)" ~ "Yes", #1 чел.
Smoking == "Daily" ~ "Yes"), #7 чел.
Alcohol = sub ("rarely", "Rarely", Alcohol),
Alcohol = ifelse(Alcohol == "Regularly (3-5 times/week)"|
Alcohol == "Daily", #3 чел.
"Regularly (3-7 times/week)", Alcohol),
Antibiotics_usage = case_when(
Antibiotics_usage == "Month" | Antibiotics_usage == ~ "3 months" |
Antibiotics_usage == "6 months" ~ "1-6 months",
#2 чел. больных Month, 0 чел. здоровых 3 months, 0 чел. здоровых для 6 months
Antibiotics_usage == "Year" | Antibiotics_usage == "Not use" ~
"12 months/Not use"), # 0 чел. из здоровых для 6-12 months, 0 чел. больных для Not use
Hygiene = case_when(
Hygiene == "Occasionally (1-2 times/week) cosmetics" ~ "Occasionally cosmetics (1-2 times/week)",
Hygiene == "Rarely (a few times/month) cosmetics" ~ "Rarely cosmetics (a few times/month)",
TRUE ~ Hygiene),
Hygiene = case_when(
Hygiene == "Daily cosmetics"|Hygiene == "Regularly cosmetics (3-5 times/week)" ~ "Regularly (3-7 times/week)", #0 чел. больных для 3-5 times/week
Hygiene == "Occasionally cosmetics (1-2 times/week)" | Hygiene == "Rarely cosmetics (a few times/month)" ~ "Occasionally (a few-8 times/month)",
#только 2 чел. больных для 1-2 times/week
Hygiene == "Never cosmetics" ~ "Never"),
Physical_activity = sub ("regularly", "Regularly", Smoking),
BMI = ifelse (is.na(Weight_kg), BMI, Weight_kg/ (Height_cm/100 * Height_cm/100)),
BMI_range = ifelse(BMI_range == "", NA, BMI_range),
BMI_category = case_when(
BMI_range == "18-25" ~ "normal/overweight",
BMI_range == "19.21-29.29" ~ "normal/overweight",
BMI_range == "20.6-29.6" ~ "normal/overweight",
BMI_range == "21.74-28.38" ~ "normal/overweight",
BMI_range == "18.5-30.8" ~ "normal/overweight", #немного больше 30
BMI < 18.5 ~ "underweight",
BMI >= 18.5 & BMI < 30 ~ "normal/overweight",
BMI >= 30 ~ "obese")
) %>%
rename ("Cosmetics" = Hygiene ) %>%
mutate_if (is.character, as.factor) %>%
select(-c(
Instrument, # unique (combined_info$Instrument) = "Illumina MiSeq"
Isolation_source, # unique (combined_info$Isolation_source) = "faeces"
Assay_type, # unique (combined_info$Assay_type) = "AMPLICON"
Target_gene, # unique (combined_info$Target_gene) = "16S"
Main_Disease, # unique (combined_info$Main_Disease) = NA (for healthy), 141 (for ibs)
Drugs, # unique (combined_info$Drugs) = NA
Social_status, # unique (combined_info$Social_status) = NA, urban
Weight_kg, Height_cm, Weight_min, Weight_max, Height_min, Height_max, BMI_range, #использованы для создания BMI_category, уменьшения количества NA в BMI
BMI, # NA у всех здоровых
Birth_Year, # имеются значения только в тех случаях, где возраст уже известен
Pets_type # только cat и NA
))
rm (info_healthy, info_ibs)
summary (combined_info)
## research_ID patient_ID Seq_region Seq_date Health_state
## Min. :1.000 Min. : 1 V3-V4:106 Min. :2015 Disease:170
## 1st Qu.:3.000 1st Qu.: 96 V4 :229 1st Qu.:2017 Health :211
## Median :4.000 Median :191 V5-V6: 46 Median :2018
## Mean :4.194 Mean :191 Mean :2019
## 3rd Qu.:6.000 3rd Qu.:286 3rd Qu.:2021
## Max. :7.000 Max. :381 Max. :2023
##
## Age Age_range Sex Country
## Min. :19.00 > 43 : 44 female: 79 Spain :95
## 1st Qu.:27.25 16-43:242 male :112 Poland :70
## Median :34.00 NA's : 95 NA's :190 Belgium:46
## Mean :38.71 Germany:45
## 3rd Qu.:47.00 Italy :37
## Max. :76.00 Austria:28
## NA's :247 (Other):60
## Race Smoking Alcohol
## African American : 1 No :187 Never : 12
## Asian or Pacific Islander: 1 Yes : 16 Occasionally (1-2 times/week): 24
## Caucasian :116 NA's:178 Rarely (a few times/month) : 36
## Hispanic : 1 Regularly (3-7 times/week) : 15
## Other : 11 NA's :294
## NA's :251
##
## Antibiotics_usage Physical_activity Travel_period
## 1-6 months : 48 No :187 3 months: 18
## 12 months/Not use:129 Yes : 16 6 months: 10
## NA's :204 NA's:178 Month : 13
## Year : 45
## NA's :295
##
##
## Education_level Cosmetics
## Bachelor's level: 31 Never : 44
## Master's level : 28 Occasionally (a few-8 times/month): 17
## School Level : 25 Regularly (3-7 times/week) : 25
## NA's :297 NA's :295
##
##
##
## Sleep_duration Diet_type Diet_duration Additive_usage
## > 8 hours: 9 10.0: 13 3.0 : 8 22.0: 1
## 4-6 hours: 9 5.0 : 5 6.0 : 5 23.0: 1
## 6-7 hours: 31 NA's:363 NA's:368 4.0 : 9
## 7-8 hours: 38 NA's:370
## NA's :294
##
##
## BMI_category
## normal/overweight:287
## obese : 5
## underweight : 1
## NA's : 88
##
##
##
combined_info %>%
select (research_ID, Country, Seq_date, Health_state) %>%
group_by(research_ID, Country, Seq_date, Health_state) %>%
summarise(n = n()) %>%
flextable() %>% theme_box() %>%
merge_v(c ("research_ID", "Country", "Seq_date", "Health_state")) %>%
align(align = "center", part = "all")
research_ID | Country | Seq_date | Health_state | n |
|---|---|---|---|---|
1 | Belgium | 2,018 | Health | 46 |
2 | Italy | 36 | ||
3 | Poland | 2,023 | 70 | |
4 | Australia | 2,021 | 6 | |
Austria | 2,020 | 3 | ||
Germany | 33 | |||
2,021 | 12 | |||
Greece | 1 | |||
Hungary | 2 | |||
Ireland | Disease | 1 | ||
Israel | Health | 1 | ||
Italy | 1 | |||
Norway | 2,018 | Disease | 1 | |
UK | 7 | |||
2,020 | 2 | |||
2,021 | 3 | |||
USA | 2,018 | 6 | ||
2,020 | 1 | |||
2,021 | 5 | |||
2,022 | 1 | |||
2,023 | 1 | |||
5 | Austria | 2,017 | 25 | |
6 | Israel | 2,022 | 22 | |
7 | Spain | 2,015 | 95 |
library(gtsummary)
combined_info %>%
select(! c(patient_ID,
Diet_duration, Additive_usage, Diet_type #у всех здоровых данные переменные = NA
)) %>%
tbl_summary(digits = list(all_continuous() ~ c(0, 0),
all_categorical() ~ c(0, 0)),
by = Health_state) %>%
add_p()
| Characteristic | Disease, N = 1701 | Health, N = 2111 | p-value2 |
|---|---|---|---|
| research_ID | <0.001 | ||
| 1 | 0 (0%) | 46 (22%) | |
| 2 | 0 (0%) | 36 (17%) | |
| 3 | 0 (0%) | 70 (33%) | |
| 4 | 28 (16%) | 59 (28%) | |
| 5 | 25 (15%) | 0 (0%) | |
| 6 | 22 (13%) | 0 (0%) | |
| 7 | 95 (56%) | 0 (0%) | |
| Seq_region | <0.001 | ||
| V3-V4 | 0 (0%) | 106 (50%) | |
| V4 | 170 (100%) | 59 (28%) | |
| V5-V6 | 0 (0%) | 46 (22%) | |
| Seq_date | <0.001 | ||
| 2015 | 95 (56%) | 0 (0%) | |
| 2017 | 25 (15%) | 0 (0%) | |
| 2018 | 14 (8%) | 82 (39%) | |
| 2020 | 3 (2%) | 36 (17%) | |
| 2021 | 9 (5%) | 23 (11%) | |
| 2022 | 23 (14%) | 0 (0%) | |
| 2023 | 1 (1%) | 70 (33%) | |
| Age | 42 (32, 50) | 28 (26, 37) | <0.001 |
| Unknown | 95 | 152 | |
| Age_range | <0.001 | ||
| > 43 | 34 (45%) | 10 (5%) | |
| 16-43 | 41 (55%) | 201 (95%) | |
| Unknown | 95 | 0 | |
| Sex | 0.15 | ||
| female | 35 (48%) | 44 (37%) | |
| male | 38 (52%) | 74 (63%) | |
| Unknown | 97 | 93 | |
| Country | |||
| Australia | 0 (0%) | 6 (3%) | |
| Austria | 25 (15%) | 3 (1%) | |
| Belgium | 0 (0%) | 46 (22%) | |
| Germany | 0 (0%) | 45 (21%) | |
| Greece | 0 (0%) | 1 (0%) | |
| Hungary | 0 (0%) | 2 (1%) | |
| Ireland | 1 (1%) | 0 (0%) | |
| Israel | 22 (13%) | 1 (0%) | |
| Italy | 0 (0%) | 37 (18%) | |
| Norway | 1 (1%) | 0 (0%) | |
| Poland | 0 (0%) | 70 (33%) | |
| Spain | 95 (56%) | 0 (0%) | |
| UK | 12 (7%) | 0 (0%) | |
| USA | 14 (8%) | 0 (0%) | |
| Race | 0.4 | ||
| African American | 0 (0%) | 1 (1%) | |
| Asian or Pacific Islander | 0 (0%) | 1 (1%) | |
| Caucasian | 27 (100%) | 89 (86%) | |
| Hispanic | 0 (0%) | 1 (1%) | |
| Other | 0 (0%) | 11 (11%) | |
| Unknown | 143 | 108 | |
| Smoking | 4 (14%) | 12 (7%) | 0.2 |
| Unknown | 142 | 36 | |
| Alcohol | 0.002 | ||
| Never | 4 (14%) | 8 (14%) | |
| Occasionally (1-2 times/week) | 4 (14%) | 20 (34%) | |
| Rarely (a few times/month) | 9 (32%) | 27 (46%) | |
| Regularly (3-7 times/week) | 11 (39%) | 4 (7%) | |
| Unknown | 142 | 152 | |
| Antibiotics_usage | 0.020 | ||
| 1-6 months | 2 (8%) | 46 (30%) | |
| 12 months/Not use | 23 (92%) | 106 (70%) | |
| Unknown | 145 | 59 | |
| Physical_activity | 4 (14%) | 12 (7%) | 0.2 |
| Unknown | 142 | 36 | |
| Travel_period | 0.066 | ||
| 3 months | 3 (11%) | 15 (26%) | |
| 6 months | 1 (4%) | 9 (16%) | |
| Month | 4 (14%) | 9 (16%) | |
| Year | 20 (71%) | 25 (43%) | |
| Unknown | 142 | 153 | |
| Education_level | <0.001 | ||
| Bachelor's level | 5 (18%) | 26 (46%) | |
| Master's level | 18 (64%) | 10 (18%) | |
| School Level | 5 (18%) | 20 (36%) | |
| Unknown | 142 | 155 | |
| Cosmetics | 0.6 | ||
| Never | 15 (56%) | 29 (49%) | |
| Occasionally (a few-8 times/month) | 6 (22%) | 11 (19%) | |
| Regularly (3-7 times/week) | 6 (22%) | 19 (32%) | |
| Unknown | 143 | 152 | |
| Sleep_duration | 0.2 | ||
| > 8 hours | 4 (14%) | 5 (8%) | |
| 4-6 hours | 4 (14%) | 5 (8%) | |
| 6-7 hours | 12 (43%) | 19 (32%) | |
| 7-8 hours | 8 (29%) | 30 (51%) | |
| Unknown | 142 | 152 | |
| BMI_category | 0.012 | ||
| normal/overweight | 135 (96%) | 152 (100%) | |
| obese | 5 (4%) | 0 (0%) | |
| underweight | 1 (1%) | 0 (0%) | |
| Unknown | 29 | 59 | |
| 1 n (%); Median (IQR) | |||
| 2 Pearson’s Chi-squared test; Wilcoxon rank sum test; Fisher’s exact test | |||
bacteria_healthy) и пациентов с СРК
(bacteria_ibs) и их объединение в
combined_bacteriabacteria_healthy <- read_csv("data/raw/final_bacteria_health.csv")
bacteria_ibs <- read_csv("data/raw/final_bacteria_ibs_141.csv")
combined_bacteria <- bacteria_healthy %>%
bind_rows (bacteria_ibs) %>%
mutate(patient_ID = row_number())
rm (bacteria_healthy, bacteria_ibs)
combined_bacteria и
combined_info в data_widedata_wide <- combined_info %>%
left_join (combined_bacteria)
data_widedata_wide %>%
select (where (function(x) sum (is.na(x))/ nrow(data_wide) * 100 > 0)) %>%
sapply (function(x) sum (is.na(x))/ nrow(data_wide) * 100) %>% round(1) %>%
as.data.frame() %>%
rename(NA_percentage = ".") %>%
mutate (
"Number of people with known data" = round (nrow(data_wide) - NA_percentage/100 * nrow(data_wide)),
NA_percentage = paste (NA_percentage, "%", sep = " ")
) %>%
arrange(desc (NA_percentage)) %>%
rownames_to_column() %>%
as_tibble() %>% flextable()
rowname | NA_percentage | Number of people with known data |
|---|---|---|
Additive_usage | 97.1 % | 11 |
Diet_duration | 96.6 % | 13 |
Diet_type | 95.3 % | 18 |
Education_level | 78 % | 84 |
Travel_period | 77.4 % | 86 |
Cosmetics | 77.4 % | 86 |
Alcohol | 77.2 % | 87 |
Sleep_duration | 77.2 % | 87 |
Race | 65.9 % | 130 |
Age | 64.8 % | 134 |
Antibiotics_usage | 53.5 % | 177 |
Sex | 49.9 % | 191 |
Smoking | 46.7 % | 203 |
Physical_activity | 46.7 % | 203 |
Age_range | 24.9 % | 286 |
BMI_category | 23.1 % | 293 |
# Устанавливаем порог процента
threshold_percent <- 95
# Функция для вычисления процента записей, не равных NA и не равных 0, для каждой колонки
calculate_percentage <- function(col) {
sum(!is.na(col) & col != 0) / length(col) * 100
}
# Применяем функцию к каждой колонке в датасете
percentage_non_zero_non_na <- sapply(data_wide[, -1], calculate_percentage)
# Создаем датафрейм с результатами
result_df_sort <- data.frame(
column = names(percentage_non_zero_non_na),
percentage = round(100 - percentage_non_zero_non_na, 2) # percentage означает пропущенные или 0 значения
) %>%
arrange(desc(percentage))
# Отфильтровываем колонки, у которых процент записей менее threshold_percent%
filtered_columns <- result_df_sort[result_df_sort$percentage < threshold_percent, ]
# Сохраним датасет в excel для дальнейшего анализа
write.xlsx(filtered_columns,
file = "data/originals/percentage_by_vars.xlsx")
# Перезапись data_wide с выбором колонок с процентом NA/0 менее threshold_percent
data_wide <- data_wide %>%
select (row.names(filtered_columns), research_ID)
rm (calculate_percentage, result_df_sort)
patient_ID# Устанавливаем порог процента
threshold_percent <- 95
# Рассчитываем процент значений, не являющихся NA и не равных 0, для каждого пациента
percentage_non_zero_non_na <- rowMeans(!is.na(data_wide) & data_wide != 0, na.rm = TRUE) * 100
# Создаем датафрейм с результатами
result_df_sort <- data.frame(
patient_id = data_wide$patient_ID,
percentage = round(100 - percentage_non_zero_non_na, 2) # percentage означает пропущенные или 0 значения
) %>% arrange(desc(percentage))
# Отфильтровываем пациентов, у которых процент значений менее threshold_percent%
filtered_patients <- result_df_sort[result_df_sort$percentage < threshold_percent, ]
# Сохраним датасет в excel для дальнейшего анализа
write.xlsx(filtered_patients,
file = "data/originals/percentage_by_patient.xlsx")
# Перезапись data_wide с удалением строк с процентом NA/0 более threshold_percent
data_wide <- data_wide %>%
slice (filtered_patients$patient_id) #при threshold_percent = 95%, изменения data_wide не происходит, так как нет пациентов с процентом NA/0 более 95%
rm (percentage_non_zero_non_na, result_df_sort)
data_wide после удаления колонок и строк с
процентом NA/0 более threshold_percentdata_wide <- data_wide %>%
select (patient_ID, any_of (colnames(combined_info)), everything()) %>%
arrange(patient_ID)
write_rds(data_wide,
file = "data/originals/data_wide.rds")
G_only_one <- data_wide %>%
select (research_ID, ends_with("_G")) %>%
add_column(n = 1) %>% #колонка для дальнейшего подсчёта количества субъектов в исследовании
group_by(research_ID) %>%
summarise_each (sum) %>%
select (research_ID, n, where (function(x) sum (x!=0) == 1)) %>% #выбираем таксоны, которые обнаружены только в одном исследовании
mutate (across(-c(1:2),
function(x) x/n))
## Warning: `summarise_each_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `across()` instead.
## ℹ The deprecated feature was likely used in the dplyr package.
## Please report the issue at <https://github.com/tidyverse/dplyr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
G_only_one
## # A tibble: 7 × 15
## research_ID n CM1G08_G Cladosporium_G Lentimonas_G Micromonospora_G
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 46 0 0 0.0500 0.0331
## 2 2 36 0 0.00811 0 0
## 3 3 70 0 0 0 0
## 4 4 87 0.00550 0 0 0
## 5 5 25 0 0 0 0
## 6 6 22 0 0 0 0
## 7 7 95 0 0 0 0
## # ℹ 9 more variables: Pseudosphingobacterium_G <dbl>,
## # `Schizothrix LEGE 07164_G` <dbl>, Talaromyces_G <dbl>,
## # `Rs-D38 termite group_G` <dbl>, Kabatiella_G <dbl>,
## # `Anaerolineaceae UCG-001_G` <dbl>, Iamia_G <dbl>, Thiohalocapsa_G <dbl>,
## # Ellin517_G <dbl>
G_taxon <- 'Ellin517_G'
res_ID <- 7
data_wide %>%
select (research_ID, G_taxon) %>%
filter (research_ID == res_ID) %>%
summarise(across (-1,
function (x) sum (x==0)/nrow (.) * 100)) %>%
rename ("Taxon_zero_percentage, %" = G_taxon)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(G_taxon)
##
## # Now:
## data %>% select(all_of(G_taxon))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## # A tibble: 1 × 1
## `Taxon_zero_percentage, %`
## <dbl>
## 1 0
# CM1G08_G - 73,6% нулей
# Cladosporium_G - 94,0% нулей
# Lentimonas_G - 94,0% нулей
# Micromonospora_G - 93,2% нулей
# Pseudosphingobacterium_G - 93,2% нулей
# Schizothrix LEGE 07164_G - 92,7% нулей
# Talaromyces_G - 92,7% нулей
# Rs-D38 termite group_G - 91,9% нулей
# Kabatiella_G - 90,6% нулей
# Anaerolineaceae UCG-001_G - 89,5% нулей
# Iamia_G - 88,2% нулей
# Thiohalocapsa_G - 87,9% нулей
# Ellin517_G - 75,1% нулей
rm (G_taxon, res_ID)
data_wide %>%
select (research_ID, ends_with("_F")) %>%
add_column(n = 1) %>% #колонка для дальнейшего подсчёта количества субъектов в исследовании
group_by(research_ID) %>%
summarise_each (sum) %>%
select (research_ID, n, where (function(x) sum (x!=0) == 1)) %>% #выбираем таксоны, которые обнаружены только в одном исследовании
mutate (across(-c(1:2),
function(x) x/n))
## # A tibble: 7 × 9
## research_ID n Didymellaceae_F Cladosporiaceae_F Trichocomaceae_F
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 46 0 0 0
## 2 2 36 0.0213 0.00811 0.0143
## 3 3 70 0 0 0
## 4 4 87 0 0 0
## 5 5 25 0 0 0
## 6 6 22 0 0 0
## 7 7 95 0 0 0
## # ℹ 4 more variables: `type III_F` <dbl>, `09D2Z48_F` <dbl>,
## # Aureobasidiaceae_F <dbl>, Iamiaceae_F <dbl>
F_taxon <- 'Iamiaceae_F'
res_ID <- 1
data_wide %>%
select (research_ID, F_taxon) %>%
filter (research_ID == res_ID) %>%
summarise(across (-1,
function (x) sum (x==0)/nrow (.) * 100)) %>%
rename ("Taxon_zero_percentage, %" = F_taxon)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(F_taxon)
##
## # Now:
## data %>% select(all_of(F_taxon))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## # A tibble: 1 × 1
## `Taxon_zero_percentage, %`
## <dbl>
## 1 2.17
# Didymellaceae_F - 44,4% нулей
# Cladosporiaceae_F - 36,1% нулей
# Trichocomaceae_F - 22,2% нулей
# type III_F - 30,4% нулей
# 09D2Z48_F - 23,9% нулей
# Aureobasidiaceae_F - 0% нулей
# Iamiaceae_F - 2,2% нулей
rm (F_taxon, res_ID)
data_wide %>%
select (research_ID, ends_with("_O")) %>%
add_column(n = 1) %>% #колонка для дальнейшего подсчёта количества субъектов в исследовании
group_by(research_ID) %>%
summarise_each (sum) %>%
select (research_ID, n, where (function(x) sum (x!=0) == 1)) %>% #выбираем таксоны, которые обнаружены только в одном исследовании
mutate (across(-c(1:2),
function(x) x/n))
## # A tibble: 7 × 12
## research_ID n Hypocreales_O Pleosporales_O `Candidatus Abawacabacteria_O`
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 46 0 0 0.00892
## 2 2 36 0.0622 0.0367 0
## 3 3 70 0 0 0
## 4 4 87 0 0 0
## 5 5 25 0 0 0
## 6 6 22 0 0 0
## 7 7 95 0 0 0
## # ℹ 7 more variables: `Candidatus Peregrinibacteria_O` <dbl>,
## # Capnodiales_O <dbl>, `Candidatus Terrybacteria_O` <dbl>,
## # Eurotiales_O <dbl>, Dothideales_O <dbl>, eub62A3_O <dbl>,
## # `LD1-PA32_O` <dbl>
O_taxon <- 'Candidatus Abawacabacteria_O'
res_ID <- 1
data_wide %>%
select (research_ID, O_taxon) %>%
filter (research_ID == res_ID) %>%
summarise(across (-1,
function (x) sum (x==0)/nrow (.) * 100)) %>%
rename ("Taxon_zero_percentage, %" = O_taxon)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(O_taxon)
##
## # Now:
## data %>% select(all_of(O_taxon))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## # A tibble: 1 × 1
## `Taxon_zero_percentage, %`
## <dbl>
## 1 52.2
# Hypocreales_O - 41,7% нулей
# Pleosporales_O - 41,7% нулей
# Candidatus Abawacabacteria_O - 52,2% нулей
# Candidatus Peregrinibacteria_O - 52,2% нулей
# Capnodiales_O - 33,3% нулей
# Candidatus Terrybacteria_O - 39,1% нулей
# Eurotiales_O - 39,1% нулей
# Dothideales_O - 0% нулей
# eub62A3_O - 15,2% нулей
# LD1-PA32_O - 2,2% нулей
rm (O_taxon, res_ID)
data_wide %>%
select (research_ID, ends_with("_C")) %>%
add_column(n = 1) %>% #колонка для дальнейшего подсчёта количества субъектов в исследовании
group_by(research_ID) %>%
summarise_each (sum) %>%
select (research_ID, n, where (function(x) sum (x!=0) == 1)) %>% #выбираем таксоны, которые обнаружены только в одном исследовании
mutate (across(-c(1:2),
function(x) x/n))
## # A tibble: 7 × 5
## research_ID n WWE3_C Eurotiomycetes_C Dothideomycetes_C
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 46 0 0 0
## 2 2 36 0 0.0229 0.102
## 3 3 70 0 0 0
## 4 4 87 0.0135 0 0
## 5 5 25 0 0 0
## 6 6 22 0 0 0
## 7 7 95 0 0 0
С_taxon <- 'Dothideomycetes_C'
res_ID <- 2
data_wide %>%
select (research_ID, С_taxon) %>%
filter (research_ID == res_ID) %>%
summarise(across (-1,
function (x) sum (x==0)/nrow (.) * 100)) %>%
rename ("Taxon_zero_percentage, %" = С_taxon)
## # A tibble: 1 × 1
## `Taxon_zero_percentage, %`
## <dbl>
## 1 0
# WWE3_C - 74,4% нулей
# Eurotiomycetes_C - 19,4% нулей
# Dothideomycetes_C - 0% нулей
rm (С_taxon, res_ID)
P_таксонов, которые были обнаружены только в одном исследовании, нет
`
data_wide <- data_wide %>%
select (!colnames (G_only_one) [- c (1,2)]) #удаление G_таксонов, которые обнаружены только в одном исследовании (все они в этом исследовании встречались редко - до 25% людей)
bac_functions# Чтение листов Excel-файла с функциями бактерий и их объединение
path <- "data/raw/Bacterial group functions.xlsx"
taxon <- c ("TaxonName", "Rank")
neuromediators <- read_xlsx (path, 2) %>%
mutate(Destroy = ifelse(is.na (Destroy), "produce", "destroy")) %>%
unique() %>%
pivot_wider(names_from = Neuromediator, values_from = Destroy)
probiotics <- read_xlsx (path, 3) %>%
add_column(probiotics = 1)
special_properties <- read_xlsx (path, 4) %>%
add_column(special_properties = 1)
vitamins <- read_xlsx (path, 5) %>%
pivot_wider (names_from = Vitamin, values_from = Vitamin,
values_fn = function(x) ifelse(is.na (x), NA, 1))
habbits <- read_xlsx (path, 7) %>%
unique() %>% #удаление повторяющихся строк
pivot_wider(names_from = Habbit, values_from = Habit_state)
bac_functions <- read_xlsx (path, 1) %>% #Патогены и нежелательные
full_join(neuromediators, by = taxon) %>% #Нейромедиаторы
full_join(probiotics, by = taxon) %>% #Пробиотики
full_join(special_properties, by = taxon) %>% #С особыми свойствами
full_join(vitamins, by = taxon) %>% #Витамины
full_join(read_xlsx (path, 6), by = taxon) %>% #Продуценты КЦДК
full_join(habbits, by = taxon) %>% #Вредные привычки
unite("Taxon", TaxonName, Rank, sep = "_") %>%
filter (Taxon != "Blautia obeum_S") %>% #для данного таксона противоречивая информация в Продуценты КЦЖК
mutate_all(as.factor)
rm (path, taxon, neuromediators, probiotics, special_properties, vitamins, habbits)
data_wide в длинный формат
(data_long), объединение data_long и
bac_functions# Создание датасета в длинном формате
data_long <- data_wide %>%
pivot_longer(ends_with(c("_D", "_P", "_O", "_C", "_F", "_G")),
names_to = "Taxon", values_to = "Percentage")
#Перезапись data_long с добавлением функций бактерий
data_long <- data_long %>%
left_join (bac_functions, by = "Taxon")
#Сохранение data_long.rds
write_rds(data_long,
file = "data/originals/data_long.rds",
compress = "gz")
G_long <- data_long %>% subset(grepl("_G", Taxon))
# %>% select (where (function(x) sum (is.na(x))/ nrow(.) * 100 < threshold_percent))
F_long <- data_long %>% subset(grepl("_F", Taxon))
C_long <- data_long %>% subset(grepl("_C", Taxon))
O_long <- data_long %>% subset(grepl("_O", Taxon))
P_long <- data_long %>% subset(grepl("_P", Taxon))
write_rds(G_long,
file = "data/originals/G_long.rds", "gz")
write_rds(F_long,
file = "data/originals/F_long.rds", "gz")
write_rds(C_long,
file = "data/originals/C_long.rds", "gz")
write_rds(O_long,
file = "data/originals/O_long.rds", "gz")
write_rds(P_long,
file = "data/originals/P_long.rds", "gz")
combined_bacteria с по группе Health_state из
combined_info без учета таксономического уровня.# Создание нового датасета для сохранения результатов
result_dataset <- data.frame(
Variable_Name = character(),
Test_Type = character(),
P_Value = numeric(),
Normal_Distribution = character(),
stringsAsFactors = FALSE
)
alpha = 0.05
# Объединяем датасеты по patient_id
combined_data <- data_wide %>%
select(Health_state,
ends_with(c("_D", "_P", "_O", "_C", "_F", "_G")))
combined_bacteria_clean <- combined_data
# Получаем список переменных из датасета combined_bacteria, исключая "patient_ID"
values_to_exclude <- c("patient_ID", "Seq_date", "Age")
variable_names <- setdiff(names(combined_bacteria_clean), values_to_exclude)
# Проходим по каждой переменной
for (variable in variable_names) {
# Фильтрация данных (исключаем строки с NA и 0 в текущей переменной)
filtered_data <- combined_data[!is.na(combined_data[[variable]]) & combined_data[[variable]] != 0, ]
#print(variable)
#print(filtered_data)
# Проверим что датасет filtered_data не пустой и что количество групп сравнения более 1, в нашем случае их 2 :)
if (nrow(filtered_data) > 0 & length(unique(filtered_data$Health_state)) > 1) {
# Check if there is sufficient variability in the data
if (length(unique(filtered_data$Health_state)) > 1) {
# Проверка на нормальность
shapiro_test_result <- try(shapiro.test(filtered_data[[variable]]), silent = TRUE)
if (inherits(shapiro_test_result, "try-error")) {
warning(paste("Skipping Shapiro-Wilk test for variable", variable, "due to an error."))
next
}
p_value_shapiro <- shapiro_test_result$p.value
# Выбор соответствующего статистического теста
if (p_value_shapiro > 0.05) {
# Если нормальное распределение, провести дисперсионный анализ
model <- aov(filtered_data[[variable]] ~ Health_state, data = filtered_data)
summary_list <- summary(model)
test_type <- "ANOVA"
} else {
# Если не нормальное распределение, использовать тест Краскела-Уоллиса
tryCatch({
model <- kruskal.test(filtered_data[[variable]] ~ Health_state, data = filtered_data)
test_type <- "Kruskal-Wallis"
}, error = function(e) {
warning(paste("Skipping Kruskal-Wallis test for variable", variable, "due to an error:", conditionMessage(e)))
next
})
}
# Добавление результатов выбранного теста в датасет
result_dataset <- rbind(result_dataset,
data.frame(Variable_Name = variable,
Test_Type = test_type,
P_Value = ifelse(test_type == "ANOVA", summary_list[[1]]$"Pr(>F)"[1], model$p.value),
Normal_Distribution = ifelse(p_value_shapiro > 0.05, "Yes", "No")))
} else {
# If there is no variability, skip the tests
warning(paste("Skipping tests for variable", variable, "as there is not enough variability in the data."))
}
}}
## Warning: Skipping Shapiro-Wilk test for variable Health_state due to an error.
# Добавление столбца с поправкой Бонферрони
result_dataset$Adjusted_P_Value_Bonferroni <- p.adjust(result_dataset$P_Value, method = "bonferroni")
# Добавление столбца с поправкой Холма
result_dataset$Adjusted_P_Value_Holm <- p.adjust(result_dataset$P_Value, method = "holm")
# Добавление столбца с поправкой Бенджамини-Хохберга
result_dataset$Adjusted_P_Value_BH <- p.adjust(result_dataset$P_Value, method = "BH")
result_dataset$test_pass = ifelse(result_dataset$Adjusted_P_Value_Bonferroni < alpha & result_dataset$Adjusted_P_Value_Holm < alpha & result_dataset$Adjusted_P_Value_BH < alpha, "Y", "N")
result_dataset_pass <- result_dataset %>% filter(test_pass == "Y")
# Вывод результатов
print(result_dataset_pass)
## Variable_Name Test_Type P_Value
## 1 Eukaryota_D Kruskal-Wallis 3.641890e-07
## 2 Nitrospirota_P Kruskal-Wallis 7.764816e-07
## 3 Armatimonadota_P Kruskal-Wallis 5.555616e-10
## 4 Acidobacteriota_P Kruskal-Wallis 3.112329e-09
## 5 Patescibacteria_P Kruskal-Wallis 1.463219e-15
## 6 Cyanobacteria_P Kruskal-Wallis 6.542425e-06
## 7 Chloroflexi_P Kruskal-Wallis 6.316248e-23
## 8 Verrucomicrobiota_P Kruskal-Wallis 8.425194e-09
## 9 Actinobacteriota_P Kruskal-Wallis 6.600394e-23
## 10 Proteobacteria_P Kruskal-Wallis 2.752566e-06
## 11 Desulfitobacteriales_O Kruskal-Wallis 5.530423e-05
## 12 Pirellulales_O Kruskal-Wallis 3.196572e-09
## 13 Anaerolineales_O Kruskal-Wallis 3.746162e-09
## 14 Xanthomonadales_O Kruskal-Wallis 1.874109e-06
## 15 Pedosphaerales_O Kruskal-Wallis 1.302641e-15
## 16 Acholeplasmatales_O Kruskal-Wallis 1.552118e-08
## 17 SBR1031_O Kruskal-Wallis 3.378675e-19
## 18 Sphingobacteriales_O Kruskal-Wallis 5.724693e-10
## 19 Flavobacteriales_O Kruskal-Wallis 8.296231e-08
## 20 Staphylococcales_O Kruskal-Wallis 2.450391e-21
## 21 Monoglobales_O Kruskal-Wallis 2.153674e-06
## 22 Bifidobacteriales_O Kruskal-Wallis 1.699744e-15
## 23 Chitinophagales_O Kruskal-Wallis 5.210248e-17
## 24 Bacillales_O Kruskal-Wallis 7.441037e-10
## 25 Clostridia vadinBB60 group_O Kruskal-Wallis 9.747928e-08
## 26 Coriobacteriales_O Kruskal-Wallis 4.680932e-22
## 27 Clostridiales_O Kruskal-Wallis 2.333755e-14
## 28 Erysipelotrichales_O Kruskal-Wallis 7.511336e-10
## 29 Lactobacillales_O Kruskal-Wallis 9.824748e-11
## 30 Peptostreptococcales-Tissierellales_O Kruskal-Wallis 8.737004e-17
## 31 Oscillospirales_O Kruskal-Wallis 4.294684e-05
## 32 vadinHA49_C Kruskal-Wallis 4.885983e-06
## 33 Desulfitobacteriia_C Kruskal-Wallis 5.530423e-05
## 34 Microgenomatia_C Kruskal-Wallis 2.221509e-05
## 35 Dehalococcoidia_C Kruskal-Wallis 1.999580e-05
## 36 Phycisphaerae_C Kruskal-Wallis 3.006549e-07
## 37 Planctomycetes_C Kruskal-Wallis 4.398873e-05
## 38 Rhodothermia_C Kruskal-Wallis 5.134815e-09
## 39 Cyanobacteriia_C Kruskal-Wallis 2.599878e-06
## 40 Anaerolineae_C Kruskal-Wallis 1.126492e-19
## 41 Verrucomicrobiae_C Kruskal-Wallis 2.572550e-09
## 42 Coriobacteriia_C Kruskal-Wallis 4.373663e-22
## 43 Actinobacteria_C Kruskal-Wallis 1.748252e-19
## 44 Bacilli_C Kruskal-Wallis 6.906521e-12
## 45 Gammaproteobacteria_C Kruskal-Wallis 2.793530e-05
## 46 Pirellulaceae_F Kruskal-Wallis 3.196572e-09
## 47 Anaerolineaceae_F Kruskal-Wallis 3.746162e-09
## 48 Neisseriaceae_F Kruskal-Wallis 1.969734e-05
## 49 Microscillaceae_F Kruskal-Wallis 3.202746e-09
## 50 Pedosphaeraceae_F Kruskal-Wallis 1.302641e-15
## 51 A4b_F Kruskal-Wallis 5.664201e-18
## 52 Acholeplasmataceae_F Kruskal-Wallis 1.552118e-08
## 53 UCG-011_F Kruskal-Wallis 1.038745e-06
## 54 Selenomonadaceae_F Kruskal-Wallis 3.125086e-06
## 55 [Clostridium] methylpentosum group_F Kruskal-Wallis 2.364971e-18
## 56 Marinilabiliaceae_F Kruskal-Wallis 2.857166e-19
## 57 Flavobacteriaceae_F Kruskal-Wallis 1.274939e-05
## 58 Staphylococcaceae_F Kruskal-Wallis 4.246920e-22
## 59 Porphyromonadaceae_F Kruskal-Wallis 5.124256e-12
## 60 Eggerthellaceae_F Kruskal-Wallis 2.117931e-18
## 61 Barnesiellaceae_F Kruskal-Wallis 2.522551e-07
## 62 Monoglobaceae_F Kruskal-Wallis 2.153674e-06
## 63 Bifidobacteriaceae_F Kruskal-Wallis 1.699744e-15
## 64 Bacillaceae_F Kruskal-Wallis 1.838790e-09
## 65 Hungateiclostridiaceae_F Kruskal-Wallis 1.127118e-12
## 66 Marinifilaceae_F Kruskal-Wallis 7.717807e-11
## 67 Streptococcaceae_F Kruskal-Wallis 6.582098e-14
## 68 Peptostreptococcaceae_F Kruskal-Wallis 1.042119e-16
## 69 Erysipelatoclostridiaceae_F Kruskal-Wallis 3.073290e-13
## 70 Clostridiaceae_F Kruskal-Wallis 9.938201e-16
## 71 Erysipelotrichaceae_F Kruskal-Wallis 1.671406e-05
## 72 Tannerellaceae_F Kruskal-Wallis 4.258468e-05
## 73 [Eubacterium] coprostanoligenes group_F Kruskal-Wallis 2.001438e-06
## 74 Butyricicoccaceae_F Kruskal-Wallis 2.897180e-05
## 75 Muribaculaceae_F Kruskal-Wallis 3.758091e-05
## 76 Ruminococcaceae_F Kruskal-Wallis 1.458702e-08
## 77 Halobacillus_G Kruskal-Wallis 4.880366e-05
## 78 Megamonas_G Kruskal-Wallis 2.480778e-05
## 79 Hespellia_G Kruskal-Wallis 1.148969e-05
## 80 Lachnospiraceae NK4B4 group_G Kruskal-Wallis 3.406312e-05
## 81 XBB1006_G Kruskal-Wallis 4.544741e-06
## 82 Macellibacteroides_G Kruskal-Wallis 8.252963e-10
## 83 Paludibacter_G Kruskal-Wallis 7.121028e-06
## 84 Anaerovorax_G Kruskal-Wallis 3.805548e-08
## 85 Candidatus Phytoplasma_G Kruskal-Wallis 7.466909e-06
## 86 Selenomonas_G Kruskal-Wallis 1.912782e-05
## 87 Lachnospiraceae UCG-009_G Kruskal-Wallis 9.999884e-06
## 88 [Eubacterium] nodatum group_G Kruskal-Wallis 2.465934e-07
## 89 Mogibacterium_G Kruskal-Wallis 4.320301e-05
## 90 Papillibacter_G Kruskal-Wallis 4.897195e-06
## 91 Anaerococcus_G Kruskal-Wallis 3.272729e-12
## 92 CAG-56_G Kruskal-Wallis 9.881041e-07
## 93 Proteiniphilum_G Kruskal-Wallis 4.464993e-13
## 94 Ruminiclostridium_G Kruskal-Wallis 1.142188e-05
## 95 Syntrophococcus_G Kruskal-Wallis 2.488686e-12
## 96 Pseudobutyrivibrio_G Kruskal-Wallis 6.026604e-06
## 97 Butyricimonas_G Kruskal-Wallis 1.196336e-05
## 98 Caproiciproducens_G Kruskal-Wallis 1.550165e-05
## 99 Prevotellaceae UCG-004_G Kruskal-Wallis 4.891415e-14
## 100 UBA1819_G Kruskal-Wallis 1.981127e-19
## 101 Barnesiella_G Kruskal-Wallis 3.908806e-06
## 102 Erysipelotrichaceae UCG-003_G Kruskal-Wallis 6.623150e-10
## 103 Rikenellaceae RC9 gut group_G Kruskal-Wallis 3.864326e-16
## 104 Porphyromonas_G Kruskal-Wallis 1.759612e-14
## 105 Staphylococcus_G Kruskal-Wallis 1.471866e-23
## 106 A2_G Kruskal-Wallis 3.435145e-21
## 107 Lachnospiraceae NC2004 group_G Kruskal-Wallis 2.010754e-07
## 108 Bifidobacterium_G Kruskal-Wallis 1.343449e-14
## 109 Monoglobus_G Kruskal-Wallis 2.153674e-06
## 110 Lachnospiraceae XPB1014 group_G Kruskal-Wallis 8.116506e-06
## 111 Romboutsia_G Kruskal-Wallis 3.566692e-13
## 112 Odoribacter_G Kruskal-Wallis 9.910652e-11
## 113 Acetitomaculum_G Kruskal-Wallis 2.040740e-06
## 114 Tyzzerella_G Kruskal-Wallis 3.609278e-05
## 115 Clostridium sensu stricto 1_G Kruskal-Wallis 2.878502e-17
## 116 Marvinbryantia_G Kruskal-Wallis 1.915920e-11
## 117 NK4A214 group_G Kruskal-Wallis 1.881102e-08
## 118 Lachnospiraceae NK3A20 group_G Kruskal-Wallis 1.638832e-05
## 119 Streptococcus_G Kruskal-Wallis 6.665449e-13
## 120 Fusicatenibacter_G Kruskal-Wallis 1.087316e-07
## 121 Dorea_G Kruskal-Wallis 1.466599e-05
## 122 Anaerostipes_G Kruskal-Wallis 2.997463e-10
## 123 Subdoligranulum_G Kruskal-Wallis 5.234297e-15
## 124 Parabacteroides_G Kruskal-Wallis 3.893385e-05
## 125 [Ruminococcus] torques group_G Kruskal-Wallis 9.216209e-09
## 126 Faecalibacterium_G Kruskal-Wallis 7.571240e-10
## 127 Lachnoclostridium_G Kruskal-Wallis 1.729484e-07
## 128 Lachnospiraceae NK4A136 group_G Kruskal-Wallis 4.135324e-10
## 129 Agathobacter_G Kruskal-Wallis 4.325760e-06
## Normal_Distribution Adjusted_P_Value_Bonferroni Adjusted_P_Value_Holm
## 1 No 3.084681e-04 2.796972e-04
## 2 No 6.576800e-04 5.955614e-04
## 3 No 4.705606e-07 4.438937e-07
## 4 No 2.636142e-06 2.458740e-06
## 5 No 1.239347e-12 1.204229e-12
## 6 No 5.541434e-03 4.900276e-03
## 7 No 5.349862e-20 5.343545e-20
## 8 No 7.136139e-06 6.596927e-06
## 9 No 5.590534e-20 5.577333e-20
## 10 No 2.331423e-03 2.083692e-03
## 11 No 4.684268e-02 3.981905e-02
## 12 No 2.707496e-06 2.522095e-06
## 13 No 3.172999e-06 2.944483e-06
## 14 No 1.587370e-03 1.431819e-03
## 15 No 1.103337e-12 1.074679e-12
## 16 No 1.314644e-05 1.210652e-05
## 17 No 2.861738e-16 2.821194e-16
## 18 No 4.848815e-07 4.568305e-07
## 19 No 7.026907e-05 6.437875e-05
## 20 No 2.075481e-18 2.060779e-18
## 21 No 1.824162e-03 1.638946e-03
## 22 No 1.439683e-12 1.397190e-12
## 23 No 4.413080e-14 4.324506e-14
## 24 No 6.302558e-07 5.923065e-07
## 25 No 8.256495e-05 7.554644e-05
## 26 No 3.964750e-19 3.941345e-19
## 27 No 1.976691e-11 1.906678e-11
## 28 No 6.362102e-07 5.971512e-07
## 29 No 8.321561e-08 7.889272e-08
## 30 No 7.400242e-14 7.242976e-14
## 31 No 3.637598e-02 3.109352e-02
## 32 No 4.138427e-03 3.674259e-03
## 33 No 4.684268e-02 3.981905e-02
## 34 No 1.881618e-02 1.628366e-02
## 35 No 1.693644e-02 1.467692e-02
## 36 No 2.546547e-04 2.312036e-04
## 37 No 3.725845e-02 3.175986e-02
## 38 No 4.349188e-06 4.025695e-06
## 39 No 2.202096e-03 1.970707e-03
## 40 No 9.541384e-17 9.451265e-17
## 41 No 2.178950e-06 2.034887e-06
## 42 No 3.704493e-19 3.686998e-19
## 43 No 1.480770e-16 1.465035e-16
## 44 No 5.849823e-09 5.566656e-09
## 45 No 2.366120e-02 2.042070e-02
## 46 No 2.707496e-06 2.522095e-06
## 47 No 3.172999e-06 2.944483e-06
## 48 No 1.668365e-02 1.447755e-02
## 49 No 2.712726e-06 2.522095e-06
## 50 No 1.103337e-12 1.074679e-12
## 51 No 4.797578e-15 4.712615e-15
## 52 No 1.314644e-05 1.210652e-05
## 53 No 8.798173e-04 7.946401e-04
## 54 No 2.646948e-03 2.362565e-03
## 55 No 2.003131e-15 1.970021e-15
## 56 No 2.420020e-16 2.388591e-16
## 57 No 1.079873e-02 9.447299e-03
## 58 No 3.597141e-19 3.584400e-19
## 59 No 4.340245e-09 4.135275e-09
## 60 No 1.793888e-15 1.766355e-15
## 61 No 2.136601e-04 1.942364e-04
## 62 No 1.824162e-03 1.638946e-03
## 63 No 1.439683e-12 1.397190e-12
## 64 No 1.557456e-06 1.456322e-06
## 65 No 9.546691e-10 9.129658e-10
## 66 No 6.536983e-08 6.205117e-08
## 67 No 5.575037e-11 5.364410e-11
## 68 No 8.826749e-14 8.628746e-14
## 69 No 2.603076e-10 2.501658e-10
## 70 No 8.417656e-13 8.208954e-13
## 71 No 1.415681e-02 1.231826e-02
## 72 No 3.606922e-02 3.087389e-02
## 73 No 1.695218e-03 1.527098e-03
## 74 No 2.453911e-02 2.114941e-02
## 75 No 3.183103e-02 2.732132e-02
## 76 No 1.235521e-05 1.139246e-05
## 77 No 4.133670e-02 3.518744e-02
## 78 No 2.101219e-02 1.815930e-02
## 79 No 9.731768e-03 8.536841e-03
## 80 No 2.885146e-02 2.483201e-02
## 81 No 3.849396e-03 3.422190e-03
## 82 No 6.990259e-07 6.544600e-07
## 83 No 6.031510e-03 5.326529e-03
## 84 No 3.223299e-05 2.956911e-05
## 85 No 6.324472e-03 5.577781e-03
## 86 No 1.620126e-02 1.407808e-02
## 87 No 8.469902e-03 7.449913e-03
## 88 No 2.088646e-04 1.901235e-04
## 89 No 3.659295e-02 3.123578e-02
## 90 No 4.147924e-03 3.677793e-03
## 91 No 2.772002e-09 2.644365e-09
## 92 No 8.369242e-04 7.568877e-04
## 93 No 3.781849e-10 3.625574e-10
## 94 No 9.674336e-03 8.497882e-03
## 95 No 2.107917e-09 2.013347e-09
## 96 No 5.104534e-03 4.519953e-03
## 97 No 1.013297e-02 8.876815e-03
## 98 No 1.312990e-02 1.145572e-02
## 99 No 4.143029e-11 3.991395e-11
## 100 No 1.678015e-16 1.658204e-16
## 101 No 3.310759e-03 2.951148e-03
## 102 No 5.609808e-07 5.278651e-07
## 103 No 3.273084e-13 3.195798e-13
## 104 No 1.490392e-11 1.439363e-11
## 105 No 1.246671e-20 1.246671e-20
## 106 No 2.909568e-18 2.885522e-18
## 107 No 1.703109e-04 1.552302e-04
## 108 No 1.137902e-11 1.100285e-11
## 109 No 1.824162e-03 1.638946e-03
## 110 No 6.874681e-03 6.054913e-03
## 111 No 3.020988e-10 2.899721e-10
## 112 No 8.394322e-08 7.948343e-08
## 113 No 1.728507e-03 1.555044e-03
## 114 No 3.057059e-02 2.627555e-02
## 115 No 2.438091e-14 2.392035e-14
## 116 No 1.622785e-08 1.542316e-08
## 117 No 1.593293e-05 1.463497e-05
## 118 No 1.388090e-02 1.209458e-02
## 119 No 5.645635e-10 5.405679e-10
## 120 No 9.209569e-05 8.415828e-05
## 121 No 1.242210e-02 1.085283e-02
## 122 No 2.538851e-07 2.400967e-07
## 123 No 4.433449e-12 4.292123e-12
## 124 No 3.297697e-02 2.826597e-02
## 125 No 7.806129e-06 7.207076e-06
## 126 No 6.412840e-07 6.011565e-07
## 127 No 1.464873e-04 1.336891e-04
## 128 No 3.502620e-07 3.308260e-07
## 129 No 3.663919e-03 3.261623e-03
## Adjusted_P_Value_BH test_pass
## 1 3.855851e-06 Y
## 2 8.119506e-06 Y
## 3 9.603279e-09 Y
## 4 4.447092e-08 Y
## 5 4.957386e-14 Y
## 6 5.597408e-05 Y
## 7 1.863511e-20 Y
## 8 1.097868e-07 Y
## 9 1.863511e-20 Y
## 10 2.562004e-05 Y
## 11 3.631216e-04 Y
## 12 4.447092e-08 Y
## 13 5.036506e-08 Y
## 14 1.889727e-05 Y
## 15 4.597237e-14 Y
## 16 1.905281e-07 Y
## 17 2.201337e-17 Y
## 18 9.697631e-09 Y
## 19 9.759594e-07 Y
## 20 2.964973e-19 Y
## 21 2.049620e-05 Y
## 22 5.332161e-14 Y
## 23 2.451711e-15 Y
## 24 1.187563e-08 Y
## 25 1.131027e-06 Y
## 26 6.607916e-20 Y
## 27 6.376422e-13 Y
## 28 1.187563e-08 Y
## 29 1.824853e-09 Y
## 30 3.894864e-15 Y
## 31 2.927436e-04 Y
## 32 4.276210e-05 Y
## 33 3.631216e-04 Y
## 34 1.636190e-04 Y
## 35 1.485653e-04 Y
## 36 3.223477e-06 Y
## 37 2.957020e-04 Y
## 38 6.795606e-08 Y
## 39 2.446774e-05 Y
## 40 1.060154e-17 Y
## 41 3.822719e-08 Y
## 42 6.607916e-20 Y
## 43 1.480770e-17 Y
## 44 1.392815e-10 Y
## 45 2.022325e-04 Y
## 46 4.447092e-08 Y
## 47 5.036506e-08 Y
## 48 1.476429e-04 Y
## 49 4.447092e-08 Y
## 50 4.597237e-14 Y
## 51 2.998486e-16 Y
## 52 1.905281e-07 Y
## 53 1.060021e-05 Y
## 54 2.877117e-05 Y
## 55 1.335421e-16 Y
## 56 2.016683e-17 Y
## 57 1.009228e-04 Y
## 58 6.607916e-20 Y
## 59 1.058596e-10 Y
## 60 1.281348e-16 Y
## 61 2.739232e-06 Y
## 62 2.049620e-05 Y
## 63 5.332161e-14 Y
## 64 2.781171e-08 Y
## 65 2.512287e-11 Y
## 66 1.485678e-09 Y
## 67 1.689405e-12 Y
## 68 4.413374e-15 Y
## 69 7.656107e-12 Y
## 70 3.826207e-14 Y
## 71 1.275388e-04 Y
## 72 2.927436e-04 Y
## 73 1.994375e-05 Y
## 74 2.079586e-04 Y
## 75 2.630664e-04 Y
## 76 1.844061e-07 Y
## 77 3.254858e-04 Y
## 78 1.811396e-04 Y
## 79 9.268351e-05 Y
## 80 2.424492e-04 Y
## 81 4.051996e-05 Y
## 82 1.270956e-08 Y
## 83 6.031510e-05 Y
## 84 4.539858e-07 Y
## 85 6.261853e-05 Y
## 86 1.446541e-04 Y
## 87 8.223205e-05 Y
## 88 2.712527e-06 Y
## 89 2.927436e-04 Y
## 90 4.276210e-05 Y
## 91 6.930005e-11 Y
## 92 1.020639e-05 Y
## 93 1.050514e-11 Y
## 94 9.268351e-05 Y
## 95 5.404915e-11 Y
## 96 5.208708e-05 Y
## 97 9.559404e-05 Y
## 98 1.204578e-04 Y
## 99 1.294696e-12 Y
## 100 1.525468e-17 Y
## 101 3.559956e-05 Y
## 102 1.099962e-08 Y
## 103 1.558612e-14 Y
## 104 4.967972e-13 Y
## 105 1.246671e-20 Y
## 106 3.636960e-19 Y
## 107 2.240932e-06 Y
## 108 3.923799e-13 Y
## 109 2.049620e-05 Y
## 110 6.739883e-05 Y
## 111 8.631396e-12 Y
## 112 1.824853e-09 Y
## 113 2.009892e-05 Y
## 114 2.547549e-04 Y
## 115 1.434171e-15 Y
## 116 3.773918e-10 Y
## 117 2.276133e-07 Y
## 118 1.261900e-04 Y
## 119 1.525847e-11 Y
## 120 1.244536e-06 Y
## 121 1.150194e-04 Y
## 122 5.401810e-09 Y
## 123 1.583375e-13 Y
## 124 2.703030e-04 Y
## 125 1.182747e-07 Y
## 126 1.187563e-08 Y
## 127 1.953164e-06 Y
## 128 7.297125e-09 Y
## 129 3.897786e-05 Y
combined_bacteria_G <- combined_bacteria_clean %>%
select(Health_state, ends_with("_G"))
d <- dist(combined_bacteria_G) # euclidean distances between the rows
## Warning in dist(combined_bacteria_G): в результате преобразования созданы NA
fit <- cmdscale(d,eig=TRUE, k=2) # k is the number of dim
df_mds <- data.frame(
x = fit$points[,1],
y = fit$points[,2]
)
df_full <- cbind(df_mds, combined_info) %>% mutate(Health_state_n = case_when(Health_state == "Health" ~ 0,
Health_state == "Disease" ~ 1))
ggplot(df_full, aes(x = x, y = y, color = Health_state)) +
geom_point() +
theme_bw() +
ggtitle("Распределение вектора таксонов в зависимости от группы пациентов")
adonis2(d ~ Health_state_n, data = df_full)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = d ~ Health_state_n, data = df_full)
## Df SumOfSqs R2 F Pr(>F)
## Health_state_n 1 2519 0.02181 8.4505 0.001 ***
## Residual 379 112971 0.97819
## Total 380 115490 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Wilcox_comparison_round_0 <- data_wide %>%
select(Health_state, Archaea, Bacteria,
ends_with(c("_D", "_P", "_O", "_C", "_F", "_G"))) %>%
mutate(across (where (is.numeric), function (x) round (x,0))) %>%
summarise_if (is.numeric, function (x) (wilcox.test(x ~ .$Health_state)$p.value)) %>%
pivot_longer(everything()) %>%
rename (Taxon = name, p_value = value) %>%
filter (p_value <= 0.05 ) %>%
arrange(p_value) %>%
add_column(p_value_holm = p.adjust(.$p_value, "holm")) %>%
add_column(p_value_BH = p.adjust(.$p_value, "BH"))
rbind (
"Количество значимо различающихся таксонов по p_value" = nrow (Wilcox_comparison_round_0),
"Количество значимо различающихся таксонов по p_value_holm" = nrow (Wilcox_comparison_round_0 %>% filter (p_value_holm <= 0.05 )),
"Количество значимо различающихся таксонов по p_value_BH" = nrow (Wilcox_comparison_round_0 %>% filter (p_value_BH <= 0.05 ))
)
## [,1]
## Количество значимо различающихся таксонов по p_value 139
## Количество значимо различающихся таксонов по p_value_holm 65
## Количество значимо различающихся таксонов по p_value_BH 139
Wilcox_comparison_round_1 <- data_wide %>%
select(Health_state, Archaea, Bacteria,
ends_with(c("_D", "_P", "_O", "_C", "_F", "_G"))) %>%
mutate(across (where (is.numeric), function (x) round (x,1))) %>%
summarise_if (is.numeric, function (x) (wilcox.test(x ~ .$Health_state)$p.value)) %>%
pivot_longer(everything()) %>%
rename (Taxon = name, p_value = value) %>%
filter (p_value <= 0.05 ) %>%
arrange(p_value) %>%
add_column(p_value_holm = p.adjust(.$p_value, "holm")) %>%
add_column(p_value_BH = p.adjust(.$p_value, "BH"))
rbind (
"Количество значимо различающихся таксонов по p_value" = nrow (Wilcox_comparison_round_1),
"Количество значимо различающихся таксонов по p_value_holm" = nrow (Wilcox_comparison_round_1 %>% filter (p_value_holm <= 0.05 )),
"Количество значимо различающихся таксонов по p_value_BH" = nrow (Wilcox_comparison_round_1 %>% filter (p_value_BH <= 0.05 ))
)
## [,1]
## Количество значимо различающихся таксонов по p_value 313
## Количество значимо различающихся таксонов по p_value_holm 179
## Количество значимо различающихся таксонов по p_value_BH 313
Wilcox_comparison_round_2 <- data_wide %>%
select(Health_state, Archaea, Bacteria,
ends_with(c("_D", "_P", "_O", "_C", "_F", "_G"))) %>%
mutate(across (where (is.numeric), function (x) round (x,2))) %>%
summarise_if (is.numeric, function (x) (wilcox.test(x ~ .$Health_state)$p.value)) %>%
pivot_longer(everything()) %>%
rename (Taxon = name, p_value = value) %>%
filter (p_value <= 0.05 ) %>%
arrange(p_value) %>%
add_column(p_value_holm = p.adjust(.$p_value, "holm")) %>%
add_column(p_value_BH = p.adjust(.$p_value, "BH"))
rbind (
"Количество значимо различающихся таксонов по p_value" = nrow (Wilcox_comparison_round_2),
"Количество значимо различающихся таксонов по p_value_holm" = nrow (Wilcox_comparison_round_2 %>% filter (p_value_holm <= 0.05 )),
"Количество значимо различающихся таксонов по p_value_BH" = nrow (Wilcox_comparison_round_2 %>% filter (p_value_BH <= 0.05 ))
)
## [,1]
## Количество значимо различающихся таксонов по p_value 633
## Количество значимо различающихся таксонов по p_value_holm 417
## Количество значимо различающихся таксонов по p_value_BH 633
create_and_plot_taxon_groups <- function(filter_pattern, step_size, dataset_name) {
# Получаем уникальные таксоны, удовлетворяющие условиям
unique_taxa <- Wilcox_comparison_round_2 %>%
subset(grepl(filter_pattern, Taxon)) %>%
filter(p_value_holm < 0.05) %>%
distinct(Taxon)
# Разбиваем уникальные таксоны на группы по step_size
taxon_groups <- split(unique_taxa, rep(1:ceiling(nrow(unique_taxa) / step_size), each = step_size, length.out = nrow(unique_taxa)))
# Проходим по группам и строим графики
for (i in seq_along(taxon_groups)) {
current_taxa <- taxon_groups[[i]]
current_dataset <- get(dataset_name) # Получаем датасет по его имени
G_long_filtered <- current_dataset %>%
filter(Taxon %in% current_taxa$Taxon & Percentage > 0) %>%
group_by(Health_state, Taxon) %>%
summarise(Count = n(), .groups = "drop")
bar_plot <- ggplot(G_long_filtered, aes(x = Health_state, y = Count, fill = Health_state == "Disease")) +
geom_bar(position = "dodge", color = "black", stat = "identity") +
scale_fill_manual(values = c("skyblue", "red"), guide = FALSE) +
labs(title = paste("Гистограмма для таксонов", (i - 1) * step_size + 1, "до", min(i * step_size, nrow(unique_taxa)), "в разрезе Health_state"),
x = "Статус пациента",
y = "Количество пациентов, у которых обнаружен данный таксон") +
coord_flip() +
facet_wrap(~Taxon, scales = "free_y", ncol = 2, shrink = 0.7) + # Уменьшение пространства между фасетами
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5), # Поворот текста на оси X и уменьшение шрифта
axis.text.y = element_text(hjust = 1, size = 5),
strip.text = element_text(size = 5)) # Уменьшение шрифта для названия фасет
print(bar_plot) # Отображение графика на экране
}
}
# Пример вызова функции с передачей "_G" в качестве аргумента и названия датасета "G_long"
create_and_plot_taxon_groups("_G", 18, "G_long")
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
create_and_plot_circular_diagrams <- function(dataset, filter_pattern, step_size, filename) {
# Получаем уникальные таксоны, удовлетворяющие условиям
unique_taxa <- Wilcox_comparison_round_2 %>%
subset(grepl(filter_pattern, Taxon)) %>%
filter(p_value_holm < 0.05) %>%
distinct(Taxon)
# Разбиваем уникальные таксоны на группы по step_size
taxon_groups <- split(unique_taxa, rep(1:ceiling(nrow(unique_taxa) / step_size), each = step_size, length.out = nrow(unique_taxa)))
# Проходим по группам и строим графики
for (i in seq_along(taxon_groups)) {
current_taxa <- taxon_groups[[i]]
G_long_filtered <- dataset %>%
filter(Taxon %in% current_taxa$Taxon & Percentage > 0) %>%
group_by(Health_state, Taxon) %>%
summarise(Count = n(), .groups = "drop")
# Добавляем строки с Count = 0 для всех комбинаций Health_state и Taxon
G_long_filtered <- G_long_filtered %>%
complete(Health_state, Taxon, fill = list(Count = 0))
data_id <- G_long_filtered %>% filter(G_long_filtered$Health_state == "Disease") %>%
mutate(id = row_number()) %>%
select(Taxon, id)
G_long_filtered <- left_join(G_long_filtered, data_id, by = "Taxon")
labels_data <- G_long_filtered %>% filter(Health_state == "Disease")
number_of_bars <- nrow(labels_data)
#Вычислим углы для лейбла каждого барра
labels_data$angel <- 90 - 360 * (labels_data$id-0.5) / number_of_bars
# Добавим горизонтольную регулировку
labels_data <- labels_data %>%
mutate(hjust = ifelse(angel < -90, 1, 0))
# Перевернем лейбл в зависимости от "полушария"
labels_data <- labels_data %>%
mutate(angel = ifelse(angel < -90, angel + 180, angel))
# Круговая диаграмма для объединенного датасета
p <- ggplot(data = G_long_filtered, mapping = aes(x = id, y = Count, fill = Health_state)) +
geom_bar(stat = "identity", position = "stack") +
ylim(-150, 500) +
# Добавляем кастомную тему
theme_minimal(base_size = 8) +
theme(axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1, 6), "cm")) +
coord_polar(start = 0)+
geom_text(data = labels_data,
aes(x = id, y = Count,
label = Taxon,
hjust = hjust),
color = "black",
fontface = "bold",
alpha = 0.9,
size = 1.5,
angle = labels_data$angel,
inherit.aes = FALSE) +
# Добавляем горизонтальные линии сетки
geom_hline(yintercept = seq(0, 300, by = 50),
linetype = "dotted",
color = "gray",
size = 0.5) +
# Добавляем значения рядом с линиями сетки
annotate("text", x = 1.2, y = seq(0, 300, by = 50),
label = seq(0, 300, by = 50),
color = "black",
size = 3,
alpha = 40,
hjust = 0)
p <- p + annotate("text", x = 0.5, y = 490, label = "Сотношение здоровых пациентов и пациентов с СРК",
size = 5, color = "black", fontface = "bold", hjust = 0.5)
# Сформируем имя файла для сохранения
current_filename <- paste0(filename, "_", i, ".png")
# Сохраняем график под уникальным именем
ggsave(current_filename, p, width = 10, height = 8)
print(p)
}
}
# Пример вызова функции
create_and_plot_circular_diagrams(G_long, "_G", 40, "output_plot")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
create_and_plot_boxplots <- function(G_long, filter_pattern, step_size, x_var, title) {
# Получаем уникальные таксоны, удовлетворяющие условиям
unique_taxa <- Wilcox_comparison_round_2 %>%
subset(grepl(filter_pattern, Taxon)) %>%
filter(p_value_holm < 0.05) %>%
distinct(Taxon)
# Разбиваем уникальные таксоны на группы по step_size
taxon_groups <- split(unique_taxa, rep(1:ceiling(nrow(unique_taxa) / step_size), each = step_size, length.out = nrow(unique_taxa)))
# Проходим по группам и строим боксплоты
for (i in seq_along(taxon_groups)) {
current_taxa <- taxon_groups[[i]]
G_long_filtered <- G_long %>%
filter(Taxon %in% current_taxa$Taxon & Percentage > 0)
# Создаем формулу для переменной x_var
formula <- as.formula(paste("Age ~", x_var))
# Строим боксплот
box_plot <- ggplot(G_long_filtered, aes_string(x = x_var, y = "Age", fill = "Health_state")) +
geom_boxplot() +
labs(title = paste(title),
x = x_var,
y = "Age") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8), # Уменьшаем размер шрифта для подписей Taxon
axis.title.x = element_blank(), # Убираем название оси X
legend.position = "bottom") + # Перемещаем легенду вниз
scale_fill_manual(values = c("red", "skyblue")) +
facet_grid(~Taxon, scales = "free_y", space = "free") # Используем facet_grid вместо facet_wrap
# Отображаем график на экране
print(box_plot)
}
}
# Пример вызова функции с Health_state в качестве x_var
create_and_plot_boxplots(G_long, "_G", 4, "Health_state", "Боксплоты для таксонов по статусу: Здоров/СРК")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 540 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 590 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 674 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 778 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 643 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 460 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 577 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 554 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 366 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 590 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 445 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 500 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 733 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 555 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 472 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 370 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 384 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 318 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 556 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 377 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 417 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 249 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 634 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 244 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 490 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 533 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 390 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 465 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 236 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 210 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 397 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 268 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 503 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 251 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 353 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 199 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 410 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 429 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 183 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 102 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 414 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 73 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(G_long, "_G", 3, "interaction(Health_state, Sex)", "Боксплоты для таксона (Пол + статус заболевания)")
## Warning: Removed 439 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 444 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 364 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 557 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 547 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 580 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 538 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 216 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 398 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 342 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 582 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 175 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 344 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 513 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 423 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 255 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 491 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 653 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 317 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 299 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 342 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 109 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 337 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 284 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 492 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 237 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 235 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 386 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 206 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 305 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 375 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 241 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 460 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 241 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 389 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 323 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 324 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 282 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 214 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 91 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 288 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 335 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 167 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 378 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 232 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 287 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 173 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 111 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 399 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 134 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 327 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 162 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 69 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 369 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 81 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 70 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(G_long, "_G", 3, "interaction(Health_state, Smoking)", "Боксплоты для таксона (Статус курения + статус заболевания)")
## Warning: Removed 439 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 444 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 364 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 557 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 547 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 580 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 538 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 216 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 398 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 342 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 582 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 175 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 344 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 513 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 423 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 255 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 491 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 653 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 317 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 299 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 342 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 109 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 337 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 284 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 492 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 237 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 235 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 386 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 206 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 305 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 375 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 241 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 460 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 241 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 389 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 323 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 324 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 282 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 214 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 91 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 288 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 335 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 167 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 378 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 232 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 287 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 173 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 111 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 399 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 134 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 327 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 162 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 69 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 369 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 81 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 70 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(G_long, "_G", 3, "interaction(Health_state, Alcohol)", "Боксплоты для таксона (Статус употребления алкоголя + статус заболевания)")
## Warning: Removed 439 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 444 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 364 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 557 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 547 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 580 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 538 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 216 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 398 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 342 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 582 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 175 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 344 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 513 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 423 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 255 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 491 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 653 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 317 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 299 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 342 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 109 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 337 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 284 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 492 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 237 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 235 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 386 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 206 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 305 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 375 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 241 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 460 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 241 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 389 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 323 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 324 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 282 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 214 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 91 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 288 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 335 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 167 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 378 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 232 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 287 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 173 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 111 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 399 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 134 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 327 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 162 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 69 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 369 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 81 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 70 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_taxon_groups("_F", 18, "F_long")
create_and_plot_circular_diagrams(F_long, "_F", 40, "F_output_plot")
create_and_plot_boxplots(F_long, "_F", 4, "Health_state", "Боксплоты для таксонов по статусу: Здоров/СРК")
## Warning: Removed 570 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 721 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 623 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 846 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 729 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 316 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 396 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 451 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 368 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 270 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 257 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 343 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 446 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 300 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 390 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 222 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 330 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 438 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 444 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 225 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 299 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 540 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 607 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 270 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 457 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 260 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 433 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 387 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(F_long, "_F", 3, "interaction(Health_state, Sex)", "Боксплоты для таксона (Пол + статус заболевания)")
## Warning: Removed 454 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 565 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 410 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 485 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 601 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 538 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 554 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 198 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 351 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 215 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 332 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 317 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 224 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 240 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 136 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 270 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 411 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 224 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 157 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 344 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 183 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 313 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 243 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 251 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 373 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 145 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 169 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 281 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 324 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 667 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 248 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 178 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 210 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 418 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 202 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 320 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 387 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(F_long, "_F", 3, "interaction(Health_state, Smoking)", "Боксплоты для таксона (Статус курения + статус заболевания)")
## Warning: Removed 454 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 565 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 410 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 485 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 601 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 538 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 554 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 198 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 351 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 215 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 332 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 317 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 224 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 240 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 136 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 270 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 411 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 224 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 157 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 344 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 183 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 313 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 243 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 251 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 373 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 145 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 169 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 281 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 324 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 667 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 248 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 178 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 210 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 418 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 202 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 320 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 387 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(F_long, "_F", 3, "interaction(Health_state, Alcohol)", "Боксплоты для таксона (Статус употребления алкоголя + статус заболевания)")
## Warning: Removed 454 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 565 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 410 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 485 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 601 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 538 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 554 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 198 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 351 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 215 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 332 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 317 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 224 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 240 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 136 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 270 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 411 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 224 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 157 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 344 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 183 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 313 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 243 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 251 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 373 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 145 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 169 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 281 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 324 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 667 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 248 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 178 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 210 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 418 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 202 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 320 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 387 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_taxon_groups("_C", 18, "C_long")
create_and_plot_circular_diagrams(C_long, "_C", 40, "F_output_plot")
create_and_plot_boxplots(C_long, "_C", 4, "Health_state", "Боксплоты для таксонов по статусу: Здоров/СРК")
## Warning: Removed 822 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 484 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 245 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 390 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 201 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 199 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 144 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 342 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 103 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 510 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(C_long, "_C", 3, "interaction(Health_state, Sex)", "Боксплоты для таксона (Пол + статус заболевания)")
## Warning: Removed 588 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 406 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 375 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 182 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 155 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 331 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 146 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 158 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 118 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 306 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 96 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 69 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 460 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 50 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(C_long, "_C", 3, "interaction(Health_state, Smoking)", "Боксплоты для таксона (Статус курения + статус заболевания)")
## Warning: Removed 588 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 406 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 375 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 182 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 155 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 331 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 146 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 158 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 118 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 306 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 96 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 69 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 460 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 50 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(C_long, "_C", 3, "interaction(Health_state, Alcohol)", "Боксплоты для таксона (Статус употребления алкоголя + статус заболевания)")
## Warning: Removed 588 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 406 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 375 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 182 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 155 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 331 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 146 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 158 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 118 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 306 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 96 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 69 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 460 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 50 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_taxon_groups("_O", 18, "O_long")
create_and_plot_circular_diagrams(O_long, "_O", 30, "F_output_plot")
create_and_plot_boxplots(O_long, "_O", 4, "Health_state", "Боксплоты для таксонов по статусу: Здоров/СРК")
## Warning: Removed 611 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 747 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 817 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 525 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 628 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 505 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 306 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 365 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 308 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 164 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 581 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 284 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 174 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 200 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 311 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 277 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 95 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 338 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 62 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 582 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 50 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(O_long, "_O", 3, "interaction(Health_state, Sex)", "Боксплоты для таксона (Пол + статус заболевания)")
## Warning: Removed 515 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 555 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 535 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 570 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 320 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 513 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 484 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 341 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 261 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 334 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 144 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 240 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 130 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 500 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 168 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 231 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 121 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 195 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 82 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 287 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 255 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 78 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 60 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 317 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 47 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 112 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 535 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(O_long, "_O", 3, "interaction(Health_state, Smoking)", "Боксплоты для таксона (Статус курения + статус заболевания)")
## Warning: Removed 515 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 555 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 535 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 570 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 320 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 513 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 484 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 341 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 261 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 334 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 144 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 240 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 130 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 500 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 168 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 231 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 121 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 195 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 82 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 287 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 255 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 78 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 60 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 317 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 47 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 112 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 535 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(O_long, "_O", 3, "interaction(Health_state, Alcohol)", "Боксплоты для таксона (Статус употребления алкоголя + статус заболевания)")
## Warning: Removed 515 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 555 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 535 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 570 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 320 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 513 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 484 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 341 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 261 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 334 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 144 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 240 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 130 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 500 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 168 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 231 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 121 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 195 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 82 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 287 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 255 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 78 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 60 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 317 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 47 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 112 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 535 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_taxon_groups("_P", 18, "P_long")
create_and_plot_circular_diagrams(P_long, "_P", 30, "P_output_plot")
create_and_plot_boxplots(P_long, "_P", 4, "Health_state", "Боксплоты для таксонов по статусу: Здоров/СРК")
## Warning: Removed 742 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 373 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 510 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 161 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 41 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(P_long, "_P", 3, "interaction(Health_state, Sex)", "Боксплоты для таксона (Пол + статус заболевания)")
## Warning: Removed 660 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 366 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 336 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 263 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 112 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 90 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(P_long, "_P", 3, "interaction(Health_state, Smoking)", "Боксплоты для таксона (Статус курения + статус заболевания)")
## Warning: Removed 660 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 366 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 336 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 263 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 112 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 90 rows containing non-finite values (`stat_boxplot()`).
create_and_plot_boxplots(P_long, "_P", 3, "interaction(Health_state, Alcohol)", "Боксплоты для таксона (Статус употребления алкоголя + статус заболевания)")
## Warning: Removed 660 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 366 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 336 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 263 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 112 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 90 rows containing non-finite values (`stat_boxplot()`).
perform_permutation_test <- function(data, filter_pattern) {
combined_bacteria_G <- data %>%
select(Health_state, ends_with(filter_pattern))
d <- dist(combined_bacteria_G) # euclidean distances between the rows
fit <- cmdscale(d, eig=TRUE, k=2) # k is the number of dim
df_mds <- data.frame(
x = fit$points[,1],
y = fit$points[,2]
)
df_full <- cbind(df_mds, combined_info) %>%
mutate(Health_state_n = case_when(Health_state == "Health" ~ 0,
Health_state == "Disease" ~ 1))
msd_plot <- ggplot(df_full, aes(x = x, y = y, color = Health_state)) +
geom_point() +
theme_bw() +
ggtitle("Распределение вектора таксонов в зависимости от группы пациентов")
print(msd_plot)
print("Используя метод пермутаций проверим отличаются ли группы в зависимости от Health_state")
adonis2(d ~ Health_state_n, data = df_full)
}
# Пример вызова функции с использованием другого фильтра
perform_permutation_test(combined_bacteria_clean, "_G")
## Warning in dist(combined_bacteria_G): в результате преобразования созданы NA
## [1] "Используя метод пермутаций проверим отличаются ли группы в зависимости от Health_state"
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = d ~ Health_state_n, data = df_full)
## Df SumOfSqs R2 F Pr(>F)
## Health_state_n 1 2519 0.02181 8.4505 0.001 ***
## Residual 379 112971 0.97819
## Total 380 115490 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
perform_permutation_test(combined_bacteria_clean, "_F")
## Warning in dist(combined_bacteria_G): в результате преобразования созданы NA
## [1] "Используя метод пермутаций проверим отличаются ли группы в зависимости от Health_state"
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = d ~ Health_state_n, data = df_full)
## Df SumOfSqs R2 F Pr(>F)
## Health_state_n 1 4139 0.0227 8.8047 0.001 ***
## Residual 379 178176 0.9773
## Total 380 182315 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
perform_permutation_test(combined_bacteria_clean, "_C")
## Warning in dist(combined_bacteria_G): в результате преобразования созданы NA
## [1] "Используя метод пермутаций проверим отличаются ли группы в зависимости от Health_state"
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = d ~ Health_state_n, data = df_full)
## Df SumOfSqs R2 F Pr(>F)
## Health_state_n 1 3161 0.01019 3.9015 0.018 *
## Residual 379 307043 0.98981
## Total 380 310204 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
perform_permutation_test(combined_bacteria_clean, "_O")
## Warning in dist(combined_bacteria_G): в результате преобразования созданы NA
## [1] "Используя метод пермутаций проверим отличаются ли группы в зависимости от Health_state"
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = d ~ Health_state_n, data = df_full)
## Df SumOfSqs R2 F Pr(>F)
## Health_state_n 1 3335 0.01602 6.1714 0.002 **
## Residual 379 204808 0.98398
## Total 380 208143 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
perform_permutation_test(combined_bacteria_clean, "_P")
## Warning in dist(combined_bacteria_G): в результате преобразования созданы NA
## [1] "Используя метод пермутаций проверим отличаются ли группы в зависимости от Health_state"
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = d ~ Health_state_n, data = df_full)
## Df SumOfSqs R2 F Pr(>F)
## Health_state_n 1 3746 0.01138 4.3612 0.014 *
## Residual 379 325550 0.98862
## Total 380 329297 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
clusters_number <- 2 #количество кластеров
library(cluster)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
G_scaled <- data_wide %>%
column_to_rownames("patient_ID") %>%
select (ends_with("_G")) %>% #выбираем только G-таксоны
scale () #нормируем данные
agnes <- G_scaled %>%
dist (method = "euclidean") %>% #cоздаём матрицу дистанций
as.matrix() %>%
agnes( #выбираем Agglomerative clustering (иерархическая кластеризация снизу вверх)
stand = F, #стандартизируем данныe
metric = "euclidean",
method = "ward")
fviz_dend(agnes,
cex = 0.5, k = clusters_number,
rect = TRUE,
k_colors = "jco",
show_labels = FALSE,
main = "Дендрограмма кластеризации субъектов по количеству G-таксонов",
ylab = "Высота",)
## Registered S3 method overwritten by 'dendextend':
## method from
## rev.hclust vegan
data_wide %>%
add_column("Cluster" = cutree(agnes, k = clusters_number)) %>% # Создаём новый столбец - вектор принадлежности к кластерам
select(2:19, Cluster) %>%
tbl_summary(digits = list(all_continuous() ~ c(0, 0),
all_categorical() ~ c(0, 0)),
by = Cluster) %>%
add_p()
| Characteristic | 1, N = 1951 | 2, N = 1861 | p-value2 |
|---|---|---|---|
| research_ID | <0.001 | ||
| 1 | 46 (24%) | 0 (0%) | |
| 2 | 14 (7%) | 22 (12%) | |
| 3 | 48 (25%) | 22 (12%) | |
| 4 | 37 (19%) | 50 (27%) | |
| 5 | 25 (13%) | 0 (0%) | |
| 6 | 13 (7%) | 9 (5%) | |
| 7 | 12 (6%) | 83 (45%) | |
| Seq_region | <0.001 | ||
| V3-V4 | 62 (32%) | 44 (24%) | |
| V4 | 87 (45%) | 142 (76%) | |
| V5-V6 | 46 (24%) | 0 (0%) | |
| Seq_date | <0.001 | ||
| 2015 | 12 (6%) | 83 (45%) | |
| 2017 | 25 (13%) | 0 (0%) | |
| 2018 | 67 (34%) | 29 (16%) | |
| 2020 | 15 (8%) | 24 (13%) | |
| 2021 | 15 (8%) | 17 (9%) | |
| 2022 | 13 (7%) | 10 (5%) | |
| 2023 | 48 (25%) | 23 (12%) | |
| Health_state | <0.001 | ||
| Disease | 59 (30%) | 111 (60%) | |
| Health | 136 (70%) | 75 (40%) | |
| Age | 33 (27, 47) | 35 (28, 48) | 0.9 |
| Unknown | 120 | 127 | |
| Age_range | 0.3 | ||
| > 43 | 25 (14%) | 19 (18%) | |
| 16-43 | 158 (86%) | 84 (82%) | |
| Unknown | 12 | 83 | |
| Sex | 0.007 | ||
| female | 57 (49%) | 22 (29%) | |
| male | 59 (51%) | 53 (71%) | |
| Unknown | 79 | 111 | |
| Country | |||
| Australia | 3 (2%) | 3 (2%) | |
| Austria | 26 (13%) | 2 (1%) | |
| Belgium | 46 (24%) | 0 (0%) | |
| Germany | 20 (10%) | 25 (13%) | |
| Greece | 1 (1%) | 0 (0%) | |
| Hungary | 2 (1%) | 0 (0%) | |
| Ireland | 0 (0%) | 1 (1%) | |
| Israel | 14 (7%) | 9 (5%) | |
| Italy | 14 (7%) | 23 (12%) | |
| Norway | 0 (0%) | 1 (1%) | |
| Poland | 48 (25%) | 22 (12%) | |
| Spain | 12 (6%) | 83 (45%) | |
| UK | 4 (2%) | 8 (4%) | |
| USA | 5 (3%) | 9 (5%) | |
| Race | 0.089 | ||
| African American | 1 (1%) | 0 (0%) | |
| Asian or Pacific Islander | 0 (0%) | 1 (2%) | |
| Caucasian | 76 (93%) | 40 (83%) | |
| Hispanic | 1 (1%) | 0 (0%) | |
| Other | 4 (5%) | 7 (15%) | |
| Unknown | 113 | 138 | |
| Smoking | 9 (7%) | 7 (10%) | 0.5 |
| Unknown | 64 | 114 | |
| Alcohol | 0.3 | ||
| Never | 4 (11%) | 8 (16%) | |
| Occasionally (1-2 times/week) | 13 (35%) | 11 (22%) | |
| Rarely (a few times/month) | 16 (43%) | 20 (40%) | |
| Regularly (3-7 times/week) | 4 (11%) | 11 (22%) | |
| Unknown | 158 | 136 | |
| Antibiotics_usage | <0.001 | ||
| 1-6 months | 48 (41%) | 0 (0%) | |
| 12 months/Not use | 68 (59%) | 61 (100%) | |
| Unknown | 79 | 125 | |
| Physical_activity | 9 (7%) | 7 (10%) | 0.5 |
| Unknown | 64 | 114 | |
| Travel_period | 0.4 | ||
| 3 months | 9 (25%) | 9 (18%) | |
| 6 months | 2 (6%) | 8 (16%) | |
| Month | 7 (19%) | 6 (12%) | |
| Year | 18 (50%) | 27 (54%) | |
| Unknown | 159 | 136 | |
| Education_level | 0.8 | ||
| Bachelor's level | 13 (38%) | 18 (36%) | |
| Master's level | 10 (29%) | 18 (36%) | |
| School Level | 11 (32%) | 14 (28%) | |
| Unknown | 161 | 136 | |
| Cosmetics | 0.2 | ||
| Never | 18 (49%) | 26 (53%) | |
| Occasionally (a few-8 times/month) | 5 (14%) | 12 (24%) | |
| Regularly (3-7 times/week) | 14 (38%) | 11 (22%) | |
| Unknown | 158 | 137 | |
| Sleep_duration | 0.5 | ||
| > 8 hours | 4 (11%) | 5 (10%) | |
| 4-6 hours | 5 (14%) | 4 (8%) | |
| 6-7 hours | 15 (41%) | 16 (32%) | |
| 7-8 hours | 13 (35%) | 25 (50%) | |
| Unknown | 158 | 136 | |
| BMI_category | 0.8 | ||
| normal/overweight | 139 (98%) | 148 (98%) | |
| obese | 3 (2%) | 2 (1%) | |
| underweight | 0 (0%) | 1 (1%) | |
| Unknown | 53 | 35 | |
| 1 n (%); Median (IQR) | |||
| 2 Pearson’s Chi-squared test; Wilcoxon rank sum test; Fisher’s exact test | |||
G_scaled %>%
kmeans(centers = clusters_number, # Кол-во центроидов-кластеров
iter.max = 20, # Максимальное кол-во итераций
nstart = 35) %>% # Кол-во центройдов в начале
fviz_cluster(data = G_scaled,
palette = c("#2E9FDF", "#E7B800"),
ellipse = FALSE,
show.clust.cent = FALSE,
geom = "point",
main = "Кластеризация субъектов по G-таксонам")
G_scaled %>%
fviz_nbclust(hcut, method = "wss")
G_scaled %>%
fviz_nbclust(hcut, method = "silhouette")
dbscan::kNNdistplot(G_scaled,
minPts = 20) # мин. количество субъектов в кластере
abline(h=37, col = "red", lty = 2)
G_scaled %>%
dbscan::dbscan(eps = 37, # радиус
minPts = 20) %>% # мин. количество субъектов в кластере
fviz_cluster(
data = G_scaled,
stand = TRUE,
ellipse = FALSE,
show.clust.cent = FALSE,
geom = "point",
palette = "jco",
ggtheme = theme_classic())